package edu.pitt.cs.zhangfan.sa.core;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;

import edu.pitt.cs.zhangfan.sa.io.ExcelReader;
import edu.pitt.cs.zhangfan.sa.model.DocumentPair;
import edu.pitt.cs.zhangfan.sa.model.Sentence;

public class DataStatistics {
	public void stat(String filePath) throws IOException {
		System.out.println("Stats of folder:"+filePath);
		
		ArrayList<DocumentPair> docPairs = new ArrayList<DocumentPair>();
		ExcelReader reader = new ExcelReader();
		File folder = new File(filePath);
		File[] subs = folder.listFiles();
		for (int i = 0; i < subs.length; i++) {
			DocumentPair dp = reader.readDocs(subs[i].getAbsolutePath());
			docPairs.add(dp);
		}

		int numOfDocs = docPairs.size();
		int d1SentNum = 0;
		int d2SentNum = 0;
		int d1WordNum = 0;
		int d2WordNum = 0;
		
		for (int i = 0; i < docPairs.size(); i++) {
			DocumentPair pair = docPairs.get(i);
			ArrayList<Sentence> d1Sents = pair.getSrc().getSentences();
			ArrayList<Sentence> d2Sents = pair.getModified().getSentences();
			d1SentNum += d1Sents.size();
			d2SentNum += d2Sents.size();
			for (int j = 0; j < d1Sents.size(); j++) {
				d1WordNum += d1Sents.get(j).getContent().split(" ").length;
			}

			for (int j = 0; j < d2Sents.size(); j++) {
				d2WordNum += d2Sents.get(j).getContent().split(" ").length;
			}
		}

		System.out.println("#Documents: "+numOfDocs);
		System.out.println("#D1 Sent:" + d1SentNum);
		System.out.println("#D2 Sent:" + d2SentNum);
		double avgD1S = (d1SentNum * 1.0) / numOfDocs;
		double avgD2S = (d2SentNum * 1.0) / numOfDocs;
		System.out.println("#Avg D1 sent in Doc:" + avgD1S);
		System.out.println("#Avg D2 sent in Doc:" + avgD2S);

		double avgWordNum1 = (d1WordNum * 1.0) / d1SentNum;
		double avgWordNum2 = (d2WordNum * 1.0) / d2SentNum;
		
		System.out.println("#Avg word in a sent, D1:"+avgWordNum1);
		System.out.println("#Avg word in a sent, D2:"+avgWordNum2);
	}
	
	public static void main(String[] args) throws IOException {
		DataStatistics ds = new DataStatistics();
		String c1Path = "E:\\independent study\\Revision\\all_data\\class1";
		String c2Path = "E:\\independent study\\Revision\\all_data\\class2";
		
		ds.stat(c1Path);
		ds.stat(c2Path);
	}
}
